cls
set more off
global lf "C:\Users\jed\Dropbox\research\bls jobs report\labor force estimates"
global jobs "C:\Users\jed\Dropbox\research\bls jobs report"
global cps "C:\Users\jed\Dropbox\research\cps pop control"

adopath + "C:\x13sam"


* payroll/household ratio -- based on the relationship between household-survey measures of employment, unadjusted vs adjusted to match the payroll survey concept
* unadjusted = LNS12000000
* adjusted = LNS16000000
* more context here: https://www.bls.gov/web/empsit/ces_cps_trends.htm
* will be used below to adjust the breakeven rate

use "$jobs\household survey series", clear
keep if inlist(series_id, "LNS16000000", "LNS12000000")
reshape wide value, i(year period date) j(series_id) string
rename value* *

gen ratio= LNS16000000/ LNS12000000
tsset date, monthly
tsfilter hp ratio_cyc=ratio, trend(payroll_HH_trend)
twoway line ratio payroll_HH_trend date
save "$lf\payroll_HH_trend", replace

* payroll concept in the household survey -- just for inspection to understand the divergence between LNS12000000 and LNS16000000  
use "$jobs\household survey series", clear
keep if inlist(series_id, "LNU02000000", "LNU02034560", "LNU02032192", "LNU02032193", "LNU02032190", "LNU02044495", "LNU02044497")
reshape wide value, i(year period date) j(series_id) string
rename value* *
outsheet using "$lf\payroll_concept_components.csv", comma replace




* age-adjusted EPOP
* age-specific actual EPOP trends, weighted by the 2024 population distribution


use "$jobs\household survey series", clear
merge m:1 series_id using "$lf\seriesid for epop", keep(matched) nogen

reshape wide value, i(year period date) j(series_id) string
rename value* *

gen epop16=LNU02000086/LNU00000086
gen epop18=LNU02000088/LNU00000088
gen epop20=LNU02000036/LNU00000036
gen epop25=LNU02024932/LNU00024932
gen epop30=LNU02024933/LNU00024933
gen epop35=LNU02024934/LNU00024934
gen epop40=LNU02024935/LNU00024935
gen epop45=LNU02024936/LNU00024936
gen epop50=LNU02024937/LNU00024937
gen epop55=LNU02000094/LNU00000094
gen epop60=LNU02000096/LNU00000096
gen epop65=LNU02024938/LNU00024938
gen epop70=LNU02024941/LNU00024941
gen epop75=LNU02024942/LNU00024942

rename LNU00000086 pop16
rename LNU00000088 pop18
rename LNU00000036 pop20
rename LNU00024932 pop25
rename LNU00024933 pop30
rename LNU00024934 pop35
rename LNU00024935 pop40
rename LNU00024936 pop45
rename LNU00024937 pop50
rename LNU00000094 pop55
rename LNU00000096 pop60
rename LNU00024938 pop65
rename LNU00024941 pop70
rename LNU00024942 pop75

keep year period date pop* epop*

reshape long pop epop, i(year period date) j(age)

* calculate age distribution for 2024 to merge onto the age-specific EPOP series
preserve
collapse (mean) fixedpop=pop if year==2024, by(age)
save "$lf\age distribution", replace
restore 

merge m:1 age using "$lf\age distribution"
collapse (mean) epop [aw=fixedpop], by(year period date)

* seasonally adjust the final series 
tsset date, monthly
sax13 epop
sax13im epop, ext(s11)
sax13del epop

keep date year epop_s11
save "$lf\adjusted epop", replace
outsheet using "$lf\adjusted epop.csv", comma replace




* just for inspection: compare age-adjusted EPOP with published prime-age EPOP
use "$jobs\household survey series", clear
keep if series_id=="LNS12300060"
merge 1:1 date using "$lf\adjusted epop"
gen epop_pub=value/100






* main breakeven calculation

* step 1: calculate EPOP by age group for 2024 
use "$jobs\household survey series", clear
merge m:1 series_id using "$lf\seriesid_for_breakeven", keep(matched) nogen

reshape wide value, i(year period date) j(series_id) string
rename value* *

gen epop16= LNU02024887/ LNU00024887
gen epop25= LNU02000089/ LNU00000089
gen epop35= LNU02000091/ LNU00000091
gen epop45= LNU02000093/ LNU00000093
gen epop55= LNU02000095/ LNU00000095
gen epop65=( LNU02024938+ LNU02024941)/( LNU00024938+ LNU00024941)
gen epop75= LNU02024942/ LNU00024942

rename LNU00024887 pop16
rename LNU00000089 pop25
rename LNU00000091 pop35
rename LNU00000093 pop45
rename LNU00000095 pop55
gen pop65=LNU00024938+ LNU00024941
rename LNU00024942 pop75

keep year period date pop* epop*

collapse (mean) epop* if year==2024
gen x=1
reshape long epop, i(x) j(age)
drop x
save "$lf\epop_by_age_2024", replace



* step 2: create age distribution over time based on Census population estimates, latest vintages
* step 2a: vintage 2024 estimates for April 2020 - December 2025

capture noisily rm "$lf\estimates_group_total_2020s.dta"

* CHANGE THIS TO 2024 AND 12 FILES
foreach i in 01 02 03 04 05 06 07 08 09 10 11 12 {
import delimited "C:\Users\jed\Dropbox\research\cps pop control\NC EST2024 ALLDATA N files\nc-est2024-alldata-n-file`i'.csv", encoding(ISO-8859-9) clear 
keep month year age tot_pop
drop if age==999

rename tot_pop pop
egen ageg=cut(age), at(0, 16, 25, 35, 45, 55, 65, 75, 999)
collapse (sum) pop, by(year month ageg)

gen vintage=2023
recode month (4.2=4)

capture noisily append using "$lf\estimates_group_total_2020s.dta"
save "$lf\estimates_group_total_2020s.dta", replace

}


* step 2b: vintage 2020 estimates for April 2010 - December 2021

capture noisily rm "$lf\estimates_group_total_2010s.dta"

foreach i in 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 {
import delimited "C:\Users\jed\Dropbox\research\cps pop control\NC EST2020 ALLDATA N files\NC-EST2020-ALLDATA-N-File`i'.csv", encoding(ISO-8859-9) clear 
keep month year age tot_pop
destring tot_pop, force replace
drop if age==999

rename tot_pop pop
egen ageg=cut(age), at(0, 16, 25, 35, 45, 55, 65, 75, 999)
collapse (sum) pop, by(year month ageg)

gen vintage=2020
recode month (4.2=4)

capture noisily append using "$lf\estimates_group_total_2010s.dta"
save "$lf\estimates_group_total_2010s.dta", replace

}


* step 2c: wedge 2010s to 2020s

use "$lf\estimates_group_total_2010s.dta", clear
recode month (4.2=4) (4.1=.)
drop if month==.
rename pop pop_vin20
merge 1:1 year month ageg using "$lf\estimates_group_total_2020s.dta"

* calculate difference in april 2020 estimate between the vintage 2024 estimates for 2020-2025 and the vintage 2020 estimates for 2010-2021
gen date=ym(year, month)
keep if date==723
gen adjust=pop-pop_vin20
keep ageg adjust

merge 1:m ageg using "$lf\estimates_group_total_2010s.dta", nogen
recode month (4.2=4) (4.1=.)
drop if month==.
rename pop pop_vin20
gen date=ym(year, month)

* pivot the April 2010 - April 2020 estimates so the April 2020 estimate from vintage 2020 matches the April 2020 estimate from vintage 2024
gen double pop_wedged=pop_vin20+adjust*(date-603)/120
merge 1:1 year month ageg using "$lf\estimates_group_total_2020s.dta", nogen
replace date=ym(year, month)

twoway line pop* date if age==35

replace pop=pop_wedged if date<723
drop if date<603

* this is the full series of population estimates by age group, 2010 to 2025

* merge 2024 epop by age group and collapse to get the fixed-epop level of employment implied by population estimates
keep ageg pop year month date
rename ageg age
merge m:1 age using "$lf\epop_by_age_2024"

preserve
collapse (mean) epop [aw=pop], by(year month date)
save "$lf/epop_fixed_age_effect", replace
restore

preserve 
collapse (sum) pop if age>=16, by(year month date)
tsset date, monthly
gen rate=pop-L1.pop
save "$lf\adult_pop", replace
restore

gen emp=epop*pop
collapse (sum) emp, by(year month date)


* adjust the employment series by the factor calculated above that reflects the payroll-concept definition of employment in the household survey
* pemp is the resulting adjusted series
merge 1:1 date using "$lf\payroll_HH_trend", keep(matched)
gen pemp=emp*payroll_HH_trend/1000
replace emp=emp/1000

*seasonally adjust
tsset date, monthly
sax13 pemp
sax13im pemp, ext(s11)
sax13del pemp

tsset date, monthly
sax13 emp
sax13im emp, ext(s11)
sax13del emp

* smooth the series with a low filter value
tsset date, monthly
gen ch1_pemp=pemp_s11-L1.pemp_s11
gen ch1_emp=emp_s11-L1.emp_s11
tsfilter hp ch1_pemp_cycle=ch1_pemp, trend(ch1_pemp_trend) smooth(10)
tsfilter hp ch1_emp_cycle=ch1_emp, trend(ch1_emp_trend) smooth(10)


twoway line ch1_pemp ch1_pemp_trend date
twoway line ch1_emp ch1_emp_trend date

twoway line ch1*trend date



keep date ch1_pemp_trend
save "$lf\breakeven_simple", replace




* combine breakeven level with actuals from the payroll survey

use "$jobs\working jobs file", clear
keep if series_id=="CES0000000001"
keep if date>=603
rename value ces
keep year date ces
tsset date, monthly
gen ces1mo=ces-L1.ces
merge 1:1 date using "$lf\breakeven_simple"
twoway line ces1mo ch1_pemp_trend date if year>=2023
gen excel_date = dofm(date)
format excel_date %td
outsheet date excel_date year ces1mo ch1_pemp_trend using "$lf\actual vs breakeven.csv", comma replace


* calculate annual averages for payroll growth and breakeven
collapse (mean) ces1mo ch1_pemp_trend, by(year)
gen gap=ces1mo-ch1
outsheet using "$lf\annual actual breakeven.csv", comma replace

